R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

climate_data <- read.csv("/home/CAMPUS/cdma2019/ChristinaMarsh_LosAngeles_data.csv")
head(climate_data)
##       STATION                            NAME       DATE PRCP TAVG TMAX TMIN
## 1 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-01   NA   NA 16.1  8.9
## 2 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-02   NA   NA 18.3  6.7
## 3 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-03   NA   NA 18.9  8.3
## 4 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-04  7.6   NA 17.8 11.1
## 5 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-05 15.2   NA 18.9 10.0
## 6 USW00093134 LOS ANGELES DOWNTOWN USC, CA US 1906-04-06   NA   NA 15.6 10.0
str(climate_data)
## 'data.frame':    39152 obs. of  7 variables:
##  $ STATION: Factor w/ 1 level "USW00093134": 1 1 1 1 1 1 1 1 1 1 ...
##  $ NAME   : Factor w/ 1 level "LOS ANGELES DOWNTOWN USC, CA US": 1 1 1 1 1 1 1 1 1 1 ...
##  $ DATE   : Factor w/ 39152 levels "1906-04-01","1906-04-02",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ PRCP   : num  NA NA NA 7.6 15.2 NA NA NA NA NA ...
##  $ TAVG   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ TMAX   : num  16.1 18.3 18.9 17.8 18.9 15.6 16.7 19.4 17.8 16.7 ...
##  $ TMIN   : num  8.9 6.7 8.3 11.1 10 10 12.2 13.3 12.8 12.8 ...
names(climate_data)
## [1] "STATION" "NAME"    "DATE"    "PRCP"    "TAVG"    "TMAX"    "TMIN"
plot(TMAX~DATE, climate_data) 

min(climate_data$TMAX)
## [1] NA
strDates <- as.character(climate_data$DATE)
climate_data$NewDate <- as.Date(strDates, "%Y-%m-%d")
plot(TMAX~NewDate, climate_data[1:1835,], ty='l')

TMAX.lm = lm(TMAX ~ NewDate, data=climate_data)
plot(TMAX ~ NewDate, data= climate_data, las=1)

plot(TMAX~NewDate, climate_data[1:1835,], ty='l')

lm(TMAX ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMAX ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   2.362e+01    5.643e-05
plot(TMAX ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMAXMean = aggregate(TMAX ~ Month + Year, climate_data, mean) 
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  3 variables:
##  $ Month: chr  "04" "05" "06" "07" ...
##  $ Year : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX : num  20.4 20.4 26.3 28.6 29.3 ...
MonthlyTMAXMean$Year.num = as.numeric(MonthlyTMAXMean$Year)
MonthlyTMAXMean$Month.num = as.numeric(MonthlyTMAXMean$Month)
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  5 variables:
##  $ Month    : chr  "04" "05" "06" "07" ...
##  $ Year     : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX     : num  20.4 20.4 26.3 28.6 29.3 ...
##  $ Year.num : num  1906 1906 1906 1906 1906 ...
##  $ Month.num: num  4 5 6 7 8 9 10 11 12 1 ...
plot(MonthlyTMAXMean$TMAX, ty='l')

plot(TMAX~NewDate, climate_data[1:1835,], ty='l')

lm(TMAX ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMAX ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   2.362e+01    5.643e-05
plot(TMAX ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMAXMean = aggregate(TMAX ~ Month + Year, climate_data, mean) 
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  3 variables:
##  $ Month: chr  "04" "05" "06" "07" ...
##  $ Year : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX : num  20.4 20.4 26.3 28.6 29.3 ...
MonthlyTMAXMean$Year.num = as.numeric(MonthlyTMAXMean$Year)
MonthlyTMAXMean$Month.num = as.numeric(MonthlyTMAXMean$Month)
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  5 variables:
##  $ Month    : chr  "04" "05" "06" "07" ...
##  $ Year     : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX     : num  20.4 20.4 26.3 28.6 29.3 ...
##  $ Year.num : num  1906 1906 1906 1906 1906 ...
##  $ Month.num: num  4 5 6 7 8 9 10 11 12 1 ...
plot(MonthlyTMAXMean$TMAX, ty='l')

plot(TMAX~Year.num, data=MonthlyTMAXMean[MonthlyTMAXMean$Month=="08",], ty='l', xlim=c(1906, 2014))
August.lm <- lm(TMAX~Year.num, data=MonthlyTMAXMean[MonthlyTMAXMean$Month=="08",])
summary(August.lm)
## 
## Call:
## lm(formula = TMAX ~ Year.num, data = MonthlyTMAXMean[MonthlyTMAXMean$Month == 
##     "08", ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4545 -1.0434 -0.1541  0.9059  3.4437 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -16.314846   8.024619  -2.033   0.0445 *  
## Year.num      0.022823   0.004081   5.592 1.76e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.359 on 106 degrees of freedom
## Multiple R-squared:  0.2278, Adjusted R-squared:  0.2205 
## F-statistic: 31.27 on 1 and 106 DF,  p-value: 1.762e-07
abline(coef(August.lm), col="red")

plot(TMAX~NewDate, climate_data[1:1835,], ty='l')

lm(TMAX ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMAX ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   2.362e+01    5.643e-05
plot(TMAX ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMAXMean = aggregate(TMAX ~ Month + Year, climate_data, mean) 
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  3 variables:
##  $ Month: chr  "04" "05" "06" "07" ...
##  $ Year : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX : num  20.4 20.4 26.3 28.6 29.3 ...
MonthlyTMAXMean$Year.num = as.numeric(MonthlyTMAXMean$Year)
MonthlyTMAXMean$Month.num = as.numeric(MonthlyTMAXMean$Month)
str(MonthlyTMAXMean)
## 'data.frame':    1279 obs. of  5 variables:
##  $ Month    : chr  "04" "05" "06" "07" ...
##  $ Year     : chr  "1906" "1906" "1906" "1906" ...
##  $ TMAX     : num  20.4 20.4 26.3 28.6 29.3 ...
##  $ Year.num : num  1906 1906 1906 1906 1906 ...
##  $ Month.num: num  4 5 6 7 8 9 10 11 12 1 ...
plot(MonthlyTMAXMean$TMAX, ty='l')

plot(TMAX~Year.num, data=MonthlyTMAXMean[MonthlyTMAXMean$Month=="09",], ty='l', xlim=c(1906, 2014))
September.lm <- lm(TMAX~Year.num, data=MonthlyTMAXMean[MonthlyTMAXMean$Month=="09",])
summary(September.lm)
## 
## Call:
## lm(formula = TMAX ~ Year.num, data = MonthlyTMAXMean[MonthlyTMAXMean$Month == 
##     "09", ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4906 -1.1184 -0.1358  1.3103  4.1107 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15.980408  10.389510  -1.538    0.127    
## Year.num      0.022327   0.005281   4.228 5.09e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.703 on 104 degrees of freedom
## Multiple R-squared:  0.1467, Adjusted R-squared:  0.1385 
## F-statistic: 17.87 on 1 and 104 DF,  p-value: 5.086e-05
abline(coef(September.lm), col="red")

plot(TMIN~NewDate, climate_data[1:1835,], ty='l')

lm(TMIN ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMIN ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   1.345e+01    5.375e-05
plot(TMIN ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMINMean = aggregate(TMIN ~ Month + Year, climate_data, mean)    
MonthlyTMINMean$Year.num = as.numeric(MonthlyTMINMean$Year)
MonthlyTMINMean$Month.num = as.numeric(MonthlyTMINMean$Month)
head(MonthlyTMINMean)
##   Month Year     TMIN Year.num Month.num
## 1    04 1906 13.09333     1906         4
## 2    05 1906 14.12258     1906         5
## 3    06 1906 18.07000     1906         6
## 4    07 1906 21.21290     1906         7
## 5    08 1906 19.69032     1906         8
## 6    09 1906 17.87333     1906         9
plot(MonthlyTMINMean$TMIN, ty='l') 

plot(TMIN~NewDate, climate_data[1:1835,], ty='l')

lm(TMIN ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMIN ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   1.345e+01    5.375e-05
plot(TMIN ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMINMean = aggregate(TMIN ~ Month + Year, climate_data, mean) 
str(MonthlyTMINMean)
## 'data.frame':    1279 obs. of  3 variables:
##  $ Month: chr  "04" "05" "06" "07" ...
##  $ Year : chr  "1906" "1906" "1906" "1906" ...
##  $ TMIN : num  13.1 14.1 18.1 21.2 19.7 ...
MonthlyTMINMean$Year.num = as.numeric(MonthlyTMINMean$Year)
MonthlyTMINMean$Month.num = as.numeric(MonthlyTMINMean$Month)
str(MonthlyTMINMean)
## 'data.frame':    1279 obs. of  5 variables:
##  $ Month    : chr  "04" "05" "06" "07" ...
##  $ Year     : chr  "1906" "1906" "1906" "1906" ...
##  $ TMIN     : num  13.1 14.1 18.1 21.2 19.7 ...
##  $ Year.num : num  1906 1906 1906 1906 1906 ...
##  $ Month.num: num  4 5 6 7 8 9 10 11 12 1 ...
plot(MonthlyTMINMean$TMIN, ty='l')

plot(TMIN~Year.num, data=MonthlyTMINMean[MonthlyTMINMean$Month=="09",], ty='l', xlim=c(1906, 2014))
September.lm <- lm(TMIN~Year.num, data=MonthlyTMINMean[MonthlyTMINMean$Month=="09",])
summary(September.lm)
## 
## Call:
## lm(formula = TMIN ~ Year.num, data = MonthlyTMINMean[MonthlyTMINMean$Month == 
##     "09", ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9099 -0.8724 -0.0956  0.7886  4.5072 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -43.634750   8.280146  -5.270 7.44e-07 ***
## Year.num      0.030972   0.004209   7.359 4.48e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.357 on 104 degrees of freedom
## Multiple R-squared:  0.3424, Adjusted R-squared:  0.3361 
## F-statistic: 54.15 on 1 and 104 DF,  p-value: 4.483e-11
abline(coef(September.lm), col="red")

plot(TMIN~NewDate, climate_data[1:1835,], ty='l')

lm(TMIN ~ NewDate, data=climate_data)
## 
## Call:
## lm(formula = TMIN ~ NewDate, data = climate_data)
## 
## Coefficients:
## (Intercept)      NewDate  
##   1.345e+01    5.375e-05
plot(TMIN ~ NewDate, data= climate_data, las=1)

climate_data$Month = format(as.Date(climate_data$NewDate), format = "%m")
climate_data$Year = format(climate_data$NewDate, format="%Y")
MonthlyTMINMean = aggregate(TMIN ~ Month + Year, climate_data, mean) 
str(MonthlyTMINMean)
## 'data.frame':    1279 obs. of  3 variables:
##  $ Month: chr  "04" "05" "06" "07" ...
##  $ Year : chr  "1906" "1906" "1906" "1906" ...
##  $ TMIN : num  13.1 14.1 18.1 21.2 19.7 ...
MonthlyTMINMean$Year.num = as.numeric(MonthlyTMINMean$Year)
MonthlyTMINMean$Month.num = as.numeric(MonthlyTMINMean$Month)
str(MonthlyTMINMean)
## 'data.frame':    1279 obs. of  5 variables:
##  $ Month    : chr  "04" "05" "06" "07" ...
##  $ Year     : chr  "1906" "1906" "1906" "1906" ...
##  $ TMIN     : num  13.1 14.1 18.1 21.2 19.7 ...
##  $ Year.num : num  1906 1906 1906 1906 1906 ...
##  $ Month.num: num  4 5 6 7 8 9 10 11 12 1 ...
plot(MonthlyTMINMean$TMIN, ty='l')

plot(TMIN~Year.num, data=MonthlyTMINMean[MonthlyTMINMean$Month=="08",], ty='l', xlim=c(1906, 2014))
August.lm <- lm(TMIN~Year.num, data=MonthlyTMINMean[MonthlyTMINMean$Month=="08",])
summary(August.lm)
## 
## Call:
## lm(formula = TMIN ~ Year.num, data = MonthlyTMINMean[MonthlyTMINMean$Month == 
##     "08", ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3968 -0.9869 -0.2585  0.8026  3.5369 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -38.814426   7.708758  -5.035 1.97e-06 ***
## Year.num      0.028839   0.003921   7.356 4.20e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.306 on 106 degrees of freedom
## Multiple R-squared:  0.338,  Adjusted R-squared:  0.3317 
## F-statistic: 54.11 on 1 and 106 DF,  p-value: 4.201e-11
abline(coef(August.lm), col="red")

MonthlyTMINMean = aggregate(TMIN ~ Month + Year, climate_data, mean)
MonthlyTMINMean$Year.num = as.numeric(MonthlyTMINMean$Year) 
MonthlyTMINMean$Month.num = as.numeric(MonthlyTMINMean$Month)
head(MonthlyTMINMean)
##   Month Year     TMIN Year.num Month.num
## 1    04 1906 13.09333     1906         4
## 2    05 1906 14.12258     1906         5
## 3    06 1906 18.07000     1906         6
## 4    07 1906 21.21290     1906         7
## 5    08 1906 19.69032     1906         8
## 6    09 1906 17.87333     1906         9
plot(MonthlyTMINMean$TMIN, ty='l')

Months = c("January", "February", "March", "April","May", "June", "July", "August", "September", "October","November", "December")
par(mfrow = c(4, 3), mar = c(5, 4, 3, 2) + 0.1)
TMAXresult <- NA
for (i in 1:12) 
plot(TMAX ~ Year.num, data = MonthlyTMAXMean[MonthlyTMAXMean$Month.num == i, ], ty = "l", las = 1, xlim = c(1906, 2014), main = Months[i], ylim = c(5, 35))
Month.lm <- lm(TMAX ~ Year.num, data = MonthlyTMAXMean[MonthlyTMAXMean$Month.num == i, ])
summary(Month.lm)
## 
## Call:
## lm(formula = TMAX ~ Year.num, data = MonthlyTMAXMean[MonthlyTMAXMean$Month.num == 
##     i, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7267 -1.3327 -0.2605  1.1234  3.9699 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.06692   10.46142  -0.006    0.995  
## Year.num     0.01008    0.00532   1.894    0.061 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.72 on 104 degrees of freedom
## Multiple R-squared:  0.03333,    Adjusted R-squared:  0.02404 
## F-statistic: 3.586 on 1 and 104 DF,  p-value: 0.06105
abline(coef(Month.lm), col = "red")

TMAXresult <- rbind(TMAXresult, cbind(Months[i], round(coef(Month.lm)[2], 4),round(summary(Month.lm)$coefficients[2,4], 4), round(summary(Month.lm)$r.squared,3)))
MonthlyTMINMean = aggregate(TMIN ~ Month + Year, climate_data, mean)
MonthlyTMINMean$Year.num = as.numeric(MonthlyTMINMean$Year) 
MonthlyTMINMean$Month.num = as.numeric(MonthlyTMINMean$Month)
head(MonthlyTMINMean)
##   Month Year     TMIN Year.num Month.num
## 1    04 1906 13.09333     1906         4
## 2    05 1906 14.12258     1906         5
## 3    06 1906 18.07000     1906         6
## 4    07 1906 21.21290     1906         7
## 5    08 1906 19.69032     1906         8
## 6    09 1906 17.87333     1906         9
plot(MonthlyTMINMean$TMIN, ty='l')

Months = c("January", "February", "March", "April","May", "June", "July", "August", "September", "October","November", "December")
par(mfrow = c(4, 3), mar = c(5, 4, 3, 2) + 0.1)
TMINresult <- NA
for (i in 1:12) 
plot(TMIN ~ Year.num, data = MonthlyTMINMean[MonthlyTMINMean$Month.num == i, ], ty = "l", las = 1, xlim = c(1906, 2014), main = Months[i]) 
Month.lm <- lm(TMIN ~ Year.num, data = MonthlyTMINMean[MonthlyTMINMean$Month.num == i, ])
summary(Month.lm)
## 
## Call:
## lm(formula = TMIN ~ Year.num, data = MonthlyTMINMean[MonthlyTMINMean$Month.num == 
##     i, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6965 -0.9602 -0.1668  0.9987  2.6743 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 15.343989   8.208805   1.869   0.0644 .
## Year.num    -0.002966   0.004175  -0.710   0.4790  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.35 on 104 degrees of freedom
## Multiple R-squared:  0.00483,    Adjusted R-squared:  -0.004739 
## F-statistic: 0.5048 on 1 and 104 DF,  p-value: 0.479
abline(coef(Month.lm), col = "red")

TMINresult <- rbind(TMINresult, cbind(Months[i], round(coef(Month.lm)[2], 4),round(summary(Month.lm)$coefficients[2,4], 4), round(summary(Month.lm)$r.squared,3)))
climate_data$PRCP[climate_data$PRCP==-9999] <- NA 
Missing <- aggregate(is.na(climate_data$PRCP),
list(climate_data$Month, climate_data$Year), sum)
Missing$Date = as.numeric(Missing$Group.1) + as.numeric(Missing$Group.2)/12
plot(x ~ Date, data=Missing)

TotalPPT <- aggregate(climate_data$PRCP,
list(climate_data$Month, climate_data$Year), sum, na.rm=T)
names(TotalPPT) = c("Group.1", "Group.2", "ppt")
NonMissing <- Missing[Missing$x < 5, c(1:3)]
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
PPT <- merge(TotalPPT, NonMissing, all.y=TRUE)
PPT$Date <- as.numeric(PPT$Group.1) + as.numeric(PPT$Group.2)/12
head(PPT)
##   Group.1 Group.2   ppt x     Date
## 1      01    1907 178.4 0 159.9167
## 2      01    1908 127.9 0 160.0000
## 3      01    1909 184.8 0 160.0833
## 4      01    1910  38.9 0 160.1667
## 5      01    1911 170.2 0 160.2500
## 6      01    1912   1.8 0 160.3333
PRCP_mean = mean(PPT$ppt)
plot(ppt~Date, data=PPT)
abline(h=PRCP_mean, col="blue")